import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.offline as poff
plt.style.use('ggplot')
poff.init_notebook_mode()
courses_df = pd.read_csv('data/udemy_courses.csv')
# Print sample
courses_df.sample(5).reset_index(drop=True).style.set_properties(**{'background-color': '#161717','color': '#30c7e6','border-color': '#8b8c8c'})
| course_id | course_title | url | is_paid | price | num_subscribers | num_reviews | num_lectures | level | content_duration | published_timestamp | subject | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 615920 | In Depth Web Development Made Easy | https://www.udemy.com/in-depth-web-development-from-scratch/ | True | 25 | 3249 | 27 | 60 | All Levels | 6.500000 | 2015-10-30T16:55:44Z | Web Development |
| 1 | 302484 | A Practical Guide to Wealth Creation | https://www.udemy.com/wealth-creation/ | True | 35 | 6 | 0 | 7 | All Levels | 1.000000 | 2016-11-18T14:51:43Z | Business Finance |
| 2 | 1144288 | Poldark TV series Soundtracks for Piano-Quicklessons | https://www.udemy.com/poldark-tv-series-soundtracks-for-piano-quicklessons/ | True | 20 | 8 | 1 | 9 | Beginner Level | 1.000000 | 2017-03-16T22:39:09Z | Musical Instruments |
| 3 | 1186514 | Learn to Play Harp: Beginners Course | https://www.udemy.com/learn-to-play-harp-beginners-course/ | False | 0 | 408 | 7 | 10 | Beginner Level | 0.683333 | 2017-04-28T17:08:37Z | Musical Instruments |
| 4 | 1029440 | Modern Rock Guitar Techniques | https://www.udemy.com/modern-rock-guitar-techniques/ | True | 50 | 3157 | 375 | 32 | Intermediate Level | 3.000000 | 2016-12-08T16:21:16Z | Musical Instruments |
# Handle missing values if any
courses_df.dropna(inplace=True)
temp_df = courses_df['subject'].value_counts().reset_index()
fig = go.Figure(data=[go.Pie(labels=temp_df['subject'],
values=temp_df['count'],
hole=.7,
title = '% of Courses by Subject',
marker_colors = px.colors.sequential.Blues_r,
)
])
fig.update_layout(title='Amount of Courses by Subject')
fig.show()
# Check if there are any duplicate rows
print("Number of duplicate rows:", courses_df.duplicated().sum())
Number of duplicate rows: 6
# Drop irrelevant columns (e.g., course_id, url)
courses_df.drop(['course_id', 'url'], axis=1, inplace=True)
# Display the first few rows of the processed dataset
courses_df.sample(5).reset_index(drop=True).style.set_properties(**{'background-color': '#161717','color': '#30c7e6','border-color': '#8b8c8c'})
| course_title | is_paid | price | num_subscribers | num_reviews | num_lectures | level | content_duration | published_timestamp | subject | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Docker Technologies for DevOps and Developers | True | 30 | 19210 | 3137 | 44 | All Levels | 3.000000 | 2016-08-29T19:02:30Z | Web Development |
| 1 | Photoshop the Art of Clipart | True | 20 | 30 | 3 | 22 | Beginner Level | 1.000000 | 2015-07-05T23:38:42Z | Graphic Design |
| 2 | The 7 Day Guitar Method | True | 20 | 213 | 4 | 11 | All Levels | 2.500000 | 2015-10-26T19:19:34Z | Musical Instruments |
| 3 | Rapid Website Design with Bootstrap | True | 200 | 14842 | 131 | 17 | Beginner Level | 1.000000 | 2016-12-02T00:45:36Z | Web Development |
| 4 | LOGO DESIGN IN POWERPOINT | True | 95 | 1046 | 12 | 28 | Beginner Level | 1.000000 | 2016-04-15T18:02:17Z | Graphic Design |
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
# Combine relevant text columns into a single text feature
courses_df['combined_text'] = courses_df['course_title'] + ' ' + courses_df['subject']
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
# Fit and transform the text data
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['combined_text'])
# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Function to get recommendations based on search query keywords
def get_recommendations(search_query):
# Transform the search query using TF-IDF vectorizer
search_query_vec = tfidf_vectorizer.transform([search_query])
# Compute cosine similarity between search query and course descriptions
cosine_sim_query = linear_kernel(search_query_vec, tfidf_matrix).flatten()
# Get indices of courses sorted by similarity score
sim_scores = list(enumerate(cosine_sim_query))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get top 10 most similar courses
sim_scores = sim_scores[:10]
course_indices = [i[0] for i in sim_scores]
return list(courses_df['course_title'].iloc[course_indices])
# Example usage
search_query = "How to trade"
recommendations = get_recommendations(search_query)
print("Recommended courses based on search query:\n\n", search_query)
for course in recommendations:
print(course)
Recommended courses based on search query: How to trade How to trade options Learn to Trade for Profit: Find and Trade Winning Stocks Learn to Trade Forex Big U-Turn Trade How to trade in the Forex market Learn To Trade The Forex Naked Price Action Acapulco Trade Learn to Trade Forex Naked Price Action Wammie Trade Day Trading - Learn to Day Trade / Swing Trade In One Hour. Trade for Profit: Find-Trade Stocks Successfully Arabic عربي WE WILL PAY YOU TO TRADE WITH US Trade for a Living
# Initialize feedback for all courses with 0
courses_df['rating'] = 0
# Function to collect user feedback and suggest alternative courses if rating is low
def collect_feedback(course_title, rating):
existing_rating = courses_df[courses_df['course_title'] == course_title]['rating'][0]
updated_rating = (existing_rating + rating) / 2
courses_df.loc[courses_df['course_title'] == course_title, 'rating'] = updated_rating
# Check if rating is low (below threshold)
low_rating_threshold = 3
if rating < low_rating_threshold:
# Get recommendations for alternative courses based on the course title
alternative_courses = get_recommendations(course_title)
print("Your rating for the course '{}' is low. Here are some alternative courses you may like:\n".format(course_title))
for course in recommendations[1:]:
print(course)
# Example usage
collect_feedback('Ultimate Investment Banking Course', 2)
Your rating for the course 'Ultimate Investment Banking Course' is low. Here are some alternative courses you may like: Learn to Trade for Profit: Find and Trade Winning Stocks Learn to Trade Forex Big U-Turn Trade How to trade in the Forex market Learn To Trade The Forex Naked Price Action Acapulco Trade Learn to Trade Forex Naked Price Action Wammie Trade Day Trading - Learn to Day Trade / Swing Trade In One Hour. Trade for Profit: Find-Trade Stocks Successfully Arabic عربي WE WILL PAY YOU TO TRADE WITH US Trade for a Living